library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.0
## ✔ tidyr 0.8.2 ✔ dplyr 0.8.3
## ✔ readr 1.3.1 ✔ stringr 1.3.1
## ✔ tibble 2.1.3 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
library(readr)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ggrepel)
library(ggthemes)
library(DT)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
fh_acs <- read_rds('./fh_acs.rds')
fh_names <- read_rds('./fh_orig_tract.rds')
fh_acs <- fh_acs %>% left_join(fh_names, by = c('geoid'='fips_tract'))
place_vars <- names(fh_names)
fh_acs <- fh_acs %>% mutate(county_code =str_sub(geoid, 1, 5))
fh_acs <- fh_acs %>% mutate(state_code =str_sub(geoid, 1, 2))
fh_acs <- fh_acs %>% group_by(geoid) %>% slice(1) %>% ungroup() # grab only the unique geoid
fh_vars_all <- names(fh_acs)[grep('Crude', names(fh_acs))]
fh_acs[, fh_vars_all] <- fh_acs[, fh_vars_all]/100 # get it to fraction units
fh_vars <- names(fh_acs)[grep('CrudePrev', names(fh_acs))]
health_prefix <- strsplit(fh_vars, '_') %>% map(function(arr) {arr[[1]]}) %>% unlist()
health_prefix <- setdiff(health_prefix, 'COLON')
fh_vars <- c(setdiff(fh_vars, 'COLON_SCREEN_CrudePrev'), 'COLON_SCREEN_CrudePrev')
health_prefix <- c(health_prefix, 'COLON_SCREEN')
fh_95hi_vars <- sprintf('%s_Crude95CI_high', health_prefix)
fh_se_vars <- sprintf('%s_SE', health_prefix)
fh_sd_vars <- sprintf('%s_SD', health_prefix)
fh_acs <- fh_acs %>% mutate(total_population = total_males + total_females)
for(index in 1:length(fh_vars)) {
fh_acs[,fh_se_vars[index]] <- (fh_acs[,fh_95hi_vars[index]] - fh_acs[, fh_vars[index]])/1.96
fh_acs[,fh_sd_vars[index]] <- fh_acs[, fh_se_vars[index]] * sqrt(fh_acs[,'total_population'])
}
fh_behavior <- c('BINGE', 'SLEEP', 'CSMOKING')
fh_disease <- c('ARTHRITIS', 'CANCER', 'CASTHMA', 'CHD', 'COPD', 'DIABETES', 'KIDNEY', 'TEETHLOST', 'STROKE')
fh_risk <- c('OBESITY', 'BPHIGH', 'LPA', 'HIGHCHOL')
fh_access <- c('ACCESS2', 'BPMED', 'CHECKUP', 'CHOLSCREEN', 'COLON_SCREEN', 'COREM', 'COREW', 'MAMMOUSE', 'MHLTH', 'PHLTH', 'PAPTEST')
fh_behavior <- fh_vars[fh_behavior %>% map(grep, fh_vars) %>% unlist()]
fh_disease <- fh_vars[fh_disease %>% map(grep, fh_vars) %>% unlist()]
fh_risk <- fh_vars[fh_risk %>% map(grep, fh_vars) %>% unlist()]
fh_access <- fh_vars[fh_access %>% map(grep, fh_vars) %>% unlist()]
ses_vars <- c('gini_index')
ses_vars <- c(ses_vars, 'median_income')
ses_vars <- c(ses_vars, 'median_home_value')
ses_vars <- c(ses_vars, 'male_over_65_pct', 'female_over_65_pct')
ses_vars <- c(ses_vars, 'white_pct', 'african_american_pct', 'american_indian_pct', 'asian_pct', 'hawaiian_pacific_islander_pct', 'other_race_pct', 'two_or_more_race_pct')
ses_vars <- c(ses_vars, 'hispanic_pct', 'mexican_pct', 'puerto_rican_pct', 'cuban_pct', 'dominican_pct', 'central_american_pct', 'south_american_pct', 'other_hispanic_pct')
ses_vars <- c(ses_vars, 'less_than_high_school_pct', 'college_pct')
ses_vars <- c(ses_vars, 'at_below_poverty_pct', 'at_below_poverty_over_65_pct')
ses_vars <- c(ses_vars, 'unemployment_pct', 'non_employment_pct')
ses_vars <- c(ses_vars, 'more_than_one_occupant_per_room_pct', 'more_than_one_occupant_per_room_renter_pct')
ses_vars <- c(ses_vars, 'no_health_insurance_pct', 'no_health_insurance_over_65_pct')
established_disease <- c('CANCER_CrudePrev', 'ARTHRITIS_CrudePrev', 'STROKE_CrudePrev', 'CASTHMA_CrudePrev','COPD_CrudePrev', 'CHD_CrudePrev', 'DIABETES_CrudePrev', 'KIDNEY_CrudePrev', 'BPMED_CrudePrev')
established_risk <- c( 'CSMOKING_CrudePrev', 'BPHIGH_CrudePrev', 'OBESITY_CrudePrev', 'HIGHCHOL_CrudePrev')
established_demographics <- c('male_over_65_pct', 'female_over_65_pct')
established_disease_se <- c('CANCER_SE', 'ARTHRITIS_SE', 'STROKE_SE', 'CASTHMA_SE','COPD_SE', 'CHD_SE', 'DIABETES_SE', 'KIDNEY_SE', 'BPMED_SE')
established_risk_se <- c( 'CSMOKING_SE', 'BPHIGH_SE', 'OBESITY_SE', 'HIGHCHOL_SE')
filter_crudeprev <- function(arr) {arr[grep('CrudePrev',arr)]}
established_cols <- c(established_disease, established_risk, established_demographics)
non_na <- complete.cases(fh_acs[, established_cols])
fh_acs_established <- fh_acs[non_na, established_cols]
cr_established <- cor(fh_acs_established, use='pairwise.complete.obs')
new_names <- c('Cancer', 'Arthritis', 'Stroke', 'Chronic Asthma', 'COPD', 'Heart Disease', 'Diabetes', 'Kidney Disease', 'BP Medication', 'Smoking', 'High BP', 'Obesity', 'High Cholesterol', 'Male Over 65', 'Female Over 65')
colnames(cr_established) <- new_names
rownames(cr_established) <- new_names
heatmapColors <- function(numColors=16) {
c1 <- rainbow(numColors,v=seq(0.5,1,length=numColors),s=seq(1,0.3,length=numColors),start=4/6,end=4.0001/6);
c2 <- rainbow(numColors,v=seq(0.5,1,length=numColors),s=seq(1,0.3,length=numColors),start=1/6,end=1.0001/6);
c3 <- c(c1,rev(c2));
return(c3)
}
heatmap.2(cr_established, trace='none',margins = c(10, 10), col=heatmapColors(8))
fh_acs_established_long <- fh_acs_established
names(fh_acs_established_long) <- new_names
fh_acs_established_long <- fh_acs_established_long %>% gather('disease', 'prevalence')
fh_acs_established_long <- fh_acs_established_long %>% mutate(disease = fct_reorder(disease, prevalence, .fun='median'))
prevalence_p <- ggplot(fh_acs_established_long, aes(disease, prevalence*100))
prevalence_p <- prevalence_p + geom_violin()
prevalence_p <- prevalence_p + xlab('') + ylab('Prevalence') + coord_flip() + theme_fivethirtyeight() + theme(axis.title = element_text())
prevalence_p
pc_established <- prcomp(fh_acs_established,center=T, scale.=T)
summary(pc_established)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0352 1.9013 0.74619 0.67495 0.5519 0.50395
## Proportion of Variance 0.6141 0.2410 0.03712 0.03037 0.0203 0.01693
## Cumulative Proportion 0.6141 0.8551 0.89227 0.92264 0.9429 0.95988
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.40954 0.38578 0.32818 0.25263 0.19010 0.18311
## Proportion of Variance 0.01118 0.00992 0.00718 0.00425 0.00241 0.00224
## Cumulative Proportion 0.97106 0.98098 0.98816 0.99241 0.99482 0.99706
## PC13 PC14 PC15
## Standard deviation 0.13607 0.11964 0.10621
## Proportion of Variance 0.00123 0.00095 0.00075
## Cumulative Proportion 0.99829 0.99925 1.00000
pc_established$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev 0.1577109 -0.42669445 0.15277657 -0.32840478
## ARTHRITIS_CrudePrev 0.3001464 -0.12969807 0.20845306 -0.34292122
## STROKE_CrudePrev 0.3132249 0.09335877 -0.03905168 0.22532181
## CASTHMA_CrudePrev 0.1995424 0.29981442 0.62570127 -0.05789069
## COPD_CrudePrev 0.3030841 0.11966747 0.15935609 -0.05660718
## CHD_CrudePrev 0.3162948 -0.04589079 -0.08449624 0.10257123
## DIABETES_CrudePrev 0.2960469 0.12676442 -0.34258838 0.31029299
## KIDNEY_CrudePrev 0.3074523 0.07334282 -0.15073924 0.33973347
## BPMED_CrudePrev 0.2581304 -0.21748557 -0.19011604 -0.40666435
## CSMOKING_CrudePrev 0.2108770 0.35609321 0.19789948 -0.08868152
## BPHIGH_CrudePrev 0.3168123 0.02448109 -0.08518649 -0.03044563
## OBESITY_CrudePrev 0.2365472 0.30528754 -0.04671833 -0.09625068
## HIGHCHOL_CrudePrev 0.2699629 -0.21002118 -0.36095919 -0.19919700
## male_over_65_pct 0.1161608 -0.42473566 0.29560925 0.36943790
## female_over_65_pct 0.1387121 -0.41499353 0.25521051 0.36798511
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev 0.14846679 -0.235337876 0.055569412 -0.344277673
## ARTHRITIS_CrudePrev 0.04567548 -0.034479049 0.018550976 -0.255515406
## STROKE_CrudePrev -0.06041283 -0.250590199 0.009403294 -0.022469779
## CASTHMA_CrudePrev -0.35911719 -0.325704442 0.123817556 0.160435034
## COPD_CrudePrev 0.45356479 0.006447686 -0.032061720 0.121248630
## CHD_CrudePrev 0.36829250 -0.139825968 -0.001741181 -0.053348364
## DIABETES_CrudePrev -0.18387917 -0.043445621 0.039298521 0.037578159
## KIDNEY_CrudePrev 0.01452565 -0.301561875 0.069966957 -0.044970043
## BPMED_CrudePrev -0.44448336 0.062004933 -0.118869367 0.503705352
## CSMOKING_CrudePrev 0.33666646 0.374241475 -0.143815297 0.370558652
## BPHIGH_CrudePrev -0.28456102 0.077400444 0.023898821 -0.211540320
## OBESITY_CrudePrev -0.19544569 0.552341836 -0.055234793 -0.523620289
## HIGHCHOL_CrudePrev 0.17104268 0.074799834 0.125861716 0.204664876
## male_over_65_pct -0.04987339 0.428053270 0.607508221 0.130550449
## female_over_65_pct -0.07776156 0.135389236 -0.741846174 0.008205357
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev 0.071307027 -0.14580133 0.240751993 -0.357041822
## ARTHRITIS_CrudePrev 0.006421197 0.11204265 -0.689233066 0.027298568
## STROKE_CrudePrev 0.309752376 0.14186742 0.175675340 -0.143093253
## CASTHMA_CrudePrev -0.368834193 -0.06628172 0.057697884 0.100200042
## COPD_CrudePrev 0.175335046 0.14571681 0.259343342 0.594971840
## CHD_CrudePrev 0.135691908 -0.19453873 -0.136361508 0.159791047
## DIABETES_CrudePrev -0.056628773 0.09956962 -0.448608733 0.031799047
## KIDNEY_CrudePrev -0.069018422 -0.43889682 0.122903139 -0.227598754
## BPMED_CrudePrev 0.347527215 -0.27784524 0.042945303 0.108566630
## CSMOKING_CrudePrev 0.057066807 0.01796871 -0.069850853 -0.595964394
## BPHIGH_CrudePrev 0.099426826 0.66810572 0.272442588 -0.137146777
## OBESITY_CrudePrev -0.076856851 -0.37895495 0.158824277 0.146825304
## HIGHCHOL_CrudePrev -0.725745604 0.10439228 0.155477041 0.009247431
## male_over_65_pct 0.091128750 -0.03528235 -0.013123497 0.011828469
## female_over_65_pct -0.175864325 0.02024850 0.008836764 0.037020926
## PC13 PC14 PC15
## CANCER_CrudePrev 0.256869764 0.1279681862 0.410714404
## ARTHRITIS_CrudePrev -0.327092743 0.1314615377 -0.224681072
## STROKE_CrudePrev -0.639851666 -0.3265180364 0.300676402
## CASTHMA_CrudePrev 0.144253480 -0.1431692922 0.064919009
## COPD_CrudePrev -0.006238702 0.3962287422 0.116046846
## CHD_CrudePrev 0.385794107 -0.6494582570 -0.226126113
## DIABETES_CrudePrev 0.332947452 0.2061567049 0.524639313
## KIDNEY_CrudePrev -0.071915613 0.4422598753 -0.445890710
## BPMED_CrudePrev 0.028148897 0.0040517155 -0.053635163
## CSMOKING_CrudePrev 0.069997037 0.0216744247 0.036066733
## BPHIGH_CrudePrev 0.279119540 0.0196343569 -0.361914821
## OBESITY_CrudePrev -0.070871750 -0.0805907843 0.105234013
## HIGHCHOL_CrudePrev -0.205834701 -0.1223461124 0.035511738
## male_over_65_pct -0.022680182 -0.0026989706 -0.005232547
## female_over_65_pct -0.020059486 0.0006854468 0.004894587
plot(pc_established$x[, 1], fh_acs_established[['BPHIGH_CrudePrev']])
plot(pc_established$x[, 1], fh_acs_established[['BPMED_CrudePrev']])
plot(pc_established$x[, 1], fh_acs_established[['OBESITY_CrudePrev']])
plot(pc_established$x[, 1], fh_acs_established[['CASTHMA_CrudePrev']])
plot(pc_established$x[, 1], fh_acs_established[['ARTHRITIS_CrudePrev']])
plot(pc_established$x[, 2], fh_acs_established[[ 'CANCER_CrudePrev']])
fh_acs_established$pc_1 <- as.numeric(pc_established$x[, 1])
fh_acs_established$pc_2 <- as.numeric(pc_established$x[, 2])
# now build a database of scores
# PC1 of all
# single
# age
placenames_cols <- c("stateabbr","placename" , "county_code", "state_code", "geoid","fips_place_tract","lat","long")
fh_acs_scoring <- fh_acs_established %>% cbind(fh_acs[non_na, placenames_cols])
fh_acs_scoring <- fh_acs_scoring %>% mutate(avg_over_65_pct = (male_over_65_pct+female_over_65_pct)/2)
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_pc_1=rescale(pc_1, to=c(0,100)))
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_pc_2=rescale(-pc_2, to=c(0,100))) # flip sign of pc2
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=rescale(risk_score_all, to=c(0,100)))
p <- ggplot(fh_acs_scoring, aes(risk_score_pc_1, risk_score_pc_2))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_pc_1, risk_score_all))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, risk_score_pc_1))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, risk_score_pc_2))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, CANCER_CrudePrev))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, DIABETES_CrudePrev))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, CHD_CrudePrev))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, CHD_CrudePrev))
p <- p + geom_point(alpha=0.5, color='gray')
p
p <- ggplot(fh_acs_scoring, aes(risk_score_both_pc, avg_over_65_pct))
p <- p + geom_point(alpha=0.5, color='gray')
p
ses_vars_to_bind <- setdiff(ses_vars, names(fh_acs_scoring))
fh_all <- cbind(fh_acs_scoring, fh_acs[non_na, ses_vars_to_bind])
top_per_state <- fh_acs_scoring %>% group_by(stateabbr) %>% top_n(1,risk_score_both_pc) %>% ungroup() %>% filter(risk_score_both_pc >= 60)
# top census tracts per state based on age, but filtered for those that have a high risk score
oldest_tracts <- fh_acs_scoring %>% group_by(stateabbr) %>% top_n(1,avg_over_65_pct) %>% ungroup() %>% filter(avg_over_65_pct >= .50)
top_both <- fh_acs_scoring %>% filter(risk_score_both_pc >= 60 & avg_over_65_pct >= .50)
p <- ggplot(fh_acs_scoring, aes(I(100*avg_over_65_pct), risk_score_both_pc))
p <- p + geom_point(alpha=0.5, color='gray')
p <- p + geom_point(data=top_per_state, aes(I(100*avg_over_65_pct), risk_score_both_pc))
p <- p + geom_text_repel(data=top_per_state, aes(I(100*avg_over_65_pct), risk_score_both_pc, label=paste(placename, stateabbr)), size=3)
p <- p + geom_point(data=oldest_tracts, aes(I(100*avg_over_65_pct), risk_score_both_pc), color='red')
p <- p + geom_text_repel(data=oldest_tracts, aes(I(100*avg_over_65_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p <- p + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = '% Age over 65', y = '')
p
top_score <- fh_all %>% group_by(stateabbr) %>% top_n(2, risk_score_both_pc) %>% filter(risk_score_both_pc >= 60)
p_income <- ggplot(fh_all, aes(median_income, risk_score_both_pc))
p_income <- p_income + geom_point(alpha=0.5, color='gray')
p_income <- p_income + geom_point(data=top_score, aes(median_income, risk_score_both_pc))
p_income <- p_income + geom_text_repel(data=top_score, aes(median_income, risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p_income <- p_income + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = 'Median Income per Tract', y = '')
p_income
## Warning: Removed 110 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_text_repel).
p_poverty <- ggplot(fh_all, aes(I(100*at_below_poverty_pct), risk_score_both_pc))
p_poverty <- p_poverty + geom_point(alpha=0.5, color='gray')
p_poverty <- p_poverty + geom_point(data=top_score, aes(I(100*at_below_poverty_pct), risk_score_both_pc))
p_poverty <- p_poverty + geom_text_repel(data=top_score, aes(I(100*at_below_poverty_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
high_poverty <- fh_all %>% group_by(stateabbr) %>% top_n(1, at_below_poverty_pct) %>% filter(at_below_poverty_pct >= .60, risk_score_both_pc > 50)
p_poverty <- p_poverty + geom_point(data=high_poverty, aes(I(100*at_below_poverty_pct), risk_score_both_pc))
p_poverty <- p_poverty + geom_text_repel(data=high_poverty, aes(I(100*at_below_poverty_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p_poverty <- p_poverty + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = '% Below Poverty per Tract', y = '')
p_poverty
## Warning: Removed 44 rows containing missing values (geom_point).
p_num_people <- ggplot(fh_all, aes(I(100*more_than_one_occupant_per_room_pct), risk_score_both_pc))
p_num_people <- p_num_people + geom_point(alpha=0.5, color='gray')
p_num_people <- p_num_people + geom_point(data=top_score, aes(I(100*more_than_one_occupant_per_room_pct), risk_score_both_pc))
p_num_people <- p_num_people + geom_text_repel(data=top_score, aes(I(100*more_than_one_occupant_per_room_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
#high_num_people_room <- fh_all %>% group_by(stateabbr) %>% top_n(1, risk_score_both_pc) %>% filter(more_than_one_occupant_per_room_pct > .4, risk_score_both_pc > 50) %>% ungroup()
#p_num_people <- p_num_people + geom_point(data=high_num_people_room, aes(I(100*more_than_one_occupant_per_room_pct), risk_score_both_pc))
#p_num_people <- p_num_people + geom_text_repel(data=high_num_people_room, aes(I(100*more_than_one_occupant_per_room_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p_num_people <- p_num_people + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = '% of households with >1 person/room', y = '')
p_num_people
## Warning: Removed 62 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
fh_acs_scoring <- fh_acs_scoring %>% cbind(fh_acs[non_na, c('total_males', 'total_females')])
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score = risk_score_both_pc)
fh_acs_scoring <- fh_acs_scoring %>% group_split(placename) %>% map(function(.x) { .x %>% mutate(rank_in_city=rank(-risk_score)) }) %>% bind_rows()
fh_acs_scoring <- fh_acs_scoring %>% group_split(stateabbr) %>% map(function(.x) { .x %>% mutate(rank_in_state=rank(-risk_score)) }) %>% bind_rows()
write_rds(fh_acs_scoring, path = "fh_acs_covid_comm_score.rds")
write_csv(fh_acs_scoring, path="fh_acs_covid_comm_score.csv")
fh_acs_scoring_city <- fh_acs_scoring %>% select(-c(pc_1, pc_2, risk_score_pc_1, risk_score_pc_2, risk_score, risk_score_both_pc, risk_score_all, lat,long, geoid, fips_place_tract)) %>% unite(place_state, placename, stateabbr, sep="|")
fh_acs_scoring_city <- fh_acs_scoring_city %>% mutate(total_population = total_males + total_females)
cols_to_avg <- c(established_disease, established_risk)
cols_se <- c(established_disease_se, established_risk_se)
fh_acs_scoring_city <- fh_acs_scoring_city %>% cbind(fh_acs[non_na, c(established_disease_se, established_risk_se)])
cols_to_total <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
cols_total <- c("male_over_65_count","female_over_65_count","avg_over_65_count")
for(i in 1:length(cols_to_total)) {
fh_acs_scoring_city <- fh_acs_scoring_city %>% mutate(!!cols_total[i] := .data[[cols_to_total[i]]]*total_population)
}
fh_acs_scoring_by_city <- fh_acs_scoring_city %>% group_by(place_state) %>% summarise_at(c(cols_total, 'total_population'), sum)
cols_to_prev <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
for(i in 1:length(cols_total)) {
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(!!cols_to_prev[i] := .data[[cols_total[i]]]/total_population)
}
## now take weighted prevalence average
weighted_prev_avg <- function(d, prefix_col) {
# d is grouped by frame for a city or state
prev_col <- sprintf('%s_CrudePrev', prefix_col)
se_col <- sprintf('%s_SE', prefix_col)
ind <- complete.cases(d[, c(prev_col, se_col)])
d <- d[ind,]
weighted.mean(d[[prev_col]], 1/(d[[se_col]]+0.0005)) # note the fudge factor
}
weighted_prev_avgs <- function(d, prefix_cols) {
avgs <- prefix_cols %>% map_dbl(~weighted_prev_avg(d, .x))
place<- d[['place_state']][1]
tibble(disease=prefix_cols, prevalence=avgs, place_state=place)
}
prefixes <- c("CANCER", "ARTHRITIS", "STROKE","CASTHMA", "COPD", "CHD", "DIABETES","KIDNEY", "BPMED", "CSMOKING", "BPHIGH","OBESITY","HIGHCHOL" )
fh_acs_scoring_prevs_by_city <- fh_acs_scoring_city %>% group_by(place_state) %>% group_split(keep=TRUE) %>% map_df(~weighted_prev_avgs(.x, prefixes))
fh_acs_scoring_prevs_by_city$disease <- sprintf('%s_CrudePrev', fh_acs_scoring_prevs_by_city$disease)
fh_acs_scoring_prevs_by_city <- fh_acs_scoring_prevs_by_city %>% spread(disease, prevalence)
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% left_join(fh_acs_scoring_prevs_by_city, by='place_state')
pc_by_city <- prcomp(fh_acs_scoring_by_city[,established_cols],center=T, scale.=T)
summary(pc_by_city)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0342 1.7703 0.97630 0.71648 0.53455 0.5020
## Proportion of Variance 0.6138 0.2089 0.06354 0.03422 0.01905 0.0168
## Cumulative Proportion 0.6138 0.8227 0.88624 0.92046 0.93951 0.9563
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.42483 0.37164 0.3464 0.25315 0.22144 0.19245
## Proportion of Variance 0.01203 0.00921 0.0080 0.00427 0.00327 0.00247
## Cumulative Proportion 0.96834 0.97755 0.9856 0.98982 0.99309 0.99556
## PC13 PC14 PC15
## Standard deviation 0.17249 0.14290 0.12805
## Proportion of Variance 0.00198 0.00136 0.00109
## Cumulative Proportion 0.99755 0.99891 1.00000
pc_by_city$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev -0.1513625 -0.46234141 0.22821155 -0.11472234
## ARTHRITIS_CrudePrev -0.2940666 -0.10042677 0.32428060 -0.15763331
## STROKE_CrudePrev -0.3107515 0.09672029 -0.09092279 0.25688009
## CASTHMA_CrudePrev -0.1934841 0.21545877 0.63368651 0.23624056
## COPD_CrudePrev -0.3097551 0.05194417 0.15298565 0.07532731
## CHD_CrudePrev -0.3175390 -0.03559995 -0.07979953 0.14232241
## DIABETES_CrudePrev -0.2680392 0.19977554 -0.41492573 0.15166847
## KIDNEY_CrudePrev -0.2902240 0.10601242 -0.23259744 0.45053133
## BPMED_CrudePrev -0.2651378 -0.14699842 -0.08255760 -0.53277744
## CSMOKING_CrudePrev -0.2626526 0.24489948 0.24498121 -0.12100572
## BPHIGH_CrudePrev -0.3058207 0.07837667 -0.12820089 -0.16401360
## OBESITY_CrudePrev -0.2378426 0.31547582 0.02046824 -0.30149445
## HIGHCHOL_CrudePrev -0.2606948 -0.15744218 -0.30352090 -0.27935332
## male_over_65_pct -0.1434706 -0.48444226 -0.01096777 0.21290136
## female_over_65_pct -0.1607230 -0.46748113 0.03110201 0.22050634
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev 0.08392488 -0.07347842 0.25553415 -0.46669281
## ARTHRITIS_CrudePrev 0.10658331 -0.10064919 0.22599308 -0.12733324
## STROKE_CrudePrev -0.11393147 0.10683646 -0.13890118 -0.14884583
## CASTHMA_CrudePrev -0.34373332 -0.47370433 -0.04433591 0.13814565
## COPD_CrudePrev 0.33440598 0.16857389 -0.26693587 -0.07595834
## CHD_CrudePrev 0.27360824 0.12780107 0.02874123 -0.28674742
## DIABETES_CrudePrev -0.18302466 -0.09581671 0.09538506 -0.02628227
## KIDNEY_CrudePrev -0.08977368 -0.09208268 0.08173772 -0.22918005
## BPMED_CrudePrev -0.55368038 0.12104692 -0.36420795 -0.23807871
## CSMOKING_CrudePrev 0.31811780 0.34094732 -0.35361984 0.23671445
## BPHIGH_CrudePrev -0.20110174 -0.01062993 0.01577419 0.32652656
## OBESITY_CrudePrev -0.01310907 0.24021625 0.70054193 0.16737612
## HIGHCHOL_CrudePrev 0.38921737 -0.65747628 -0.10373285 0.24712102
## male_over_65_pct -0.06690911 0.19134865 0.12225284 0.38076066
## female_over_65_pct -0.13370052 0.16925387 -0.01007635 0.36074883
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev 0.072870493 -0.33914832 0.27547172 -0.23406289
## ARTHRITIS_CrudePrev 0.241497931 0.67528027 0.12988256 0.28288316
## STROKE_CrudePrev 0.212762616 -0.23073121 0.25143581 0.17967364
## CASTHMA_CrudePrev -0.145995946 -0.10206475 -0.12926353 -0.06412544
## COPD_CrudePrev 0.219679729 -0.10294665 -0.65236875 -0.05495016
## CHD_CrudePrev -0.099853188 0.05922997 -0.18073711 -0.13243167
## DIABETES_CrudePrev 0.005254134 0.43398080 -0.01451853 -0.22891985
## KIDNEY_CrudePrev -0.265574072 -0.13103901 0.16133076 0.17389213
## BPMED_CrudePrev -0.261258660 0.02080074 -0.15209884 0.09552718
## CSMOKING_CrudePrev -0.276916937 0.02510667 0.50793699 -0.02121700
## BPHIGH_CrudePrev 0.681418713 -0.23231460 0.14031844 -0.10110595
## OBESITY_CrudePrev -0.259482972 -0.20680412 -0.16221918 -0.04383570
## HIGHCHOL_CrudePrev -0.191505842 -0.11908380 0.00315688 0.03989486
## male_over_65_pct -0.087832405 -0.06557378 -0.12377680 0.59321163
## female_over_65_pct -0.141449238 0.17152905 0.01893415 -0.59193355
## PC13 PC14 PC15
## CANCER_CrudePrev 0.301953414 0.08927087 0.22890783
## ARTHRITIS_CrudePrev -0.237040114 -0.12640264 0.03568674
## STROKE_CrudePrev -0.486543461 0.56781851 0.02883620
## CASTHMA_CrudePrev 0.127599997 0.10395957 -0.12462931
## COPD_CrudePrev -0.003373466 -0.07894889 0.40162146
## CHD_CrudePrev 0.122546485 0.04612681 -0.78498727
## DIABETES_CrudePrev 0.470687683 0.32485392 0.26935319
## KIDNEY_CrudePrev -0.092139236 -0.62623112 0.16457916
## BPMED_CrudePrev -0.034577843 -0.06229959 -0.02928294
## CSMOKING_CrudePrev 0.236746526 -0.02143721 0.07471927
## BPHIGH_CrudePrev 0.157695741 -0.31052596 -0.20015988
## OBESITY_CrudePrev -0.165828195 0.07669304 0.06497547
## HIGHCHOL_CrudePrev -0.145504597 0.08690142 0.01766287
## male_over_65_pct 0.318050339 0.13286900 -0.01495440
## female_over_65_pct -0.350178782 -0.07011795 0.03949111
fh_acs_scoring_by_city$pc_1 <- as.numeric(pc_by_city$x[, 1])
fh_acs_scoring_by_city$pc_2 <- as.numeric(pc_by_city$x[, 2])
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_pc_1=as.numeric(rescale(-pc_1, to=c(0,100)))) # flip the sign
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_pc_2=as.numeric(rescale(-pc_2, to=c(0,100))))
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=as.numeric(rescale(risk_score_all, to=c(0,100))))
plot(fh_acs_scoring_by_city$risk_score_pc_1, fh_acs_scoring_by_city$risk_score_all)
plot(fh_acs_scoring_by_city$risk_score_pc_2, fh_acs_scoring_by_city$risk_score_all)
# add stateabbr
plot(fh_acs_scoring_by_city$risk_score_both_pc, fh_acs_scoring_by_city$avg_over_65_pct)
plot(fh_acs_scoring_by_city$risk_score_both_pc, fh_acs_scoring_by_city$DIABETES_CrudePrev)
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% separate(place_state, c("placename", "stateabbr"), sep="\\|")
top_per_state <- fh_acs_scoring_by_city %>% group_by(stateabbr) %>% top_n(5,risk_score_pc_1) %>% ungroup()
DT::datatable(top_per_state %>% select(stateabbr, placename, risk_score_pc_1, risk_score_both_pc, risk_score_all))
fh_acs_scoring_by_city_distribute <- fh_acs_scoring_by_city %>% select(c(established_cols, "placename", "stateabbr", "risk_score_both_pc", "risk_score_pc_1", "risk_score_pc_2", "risk_score_all", "total_population"))
fh_acs_scoring_by_city_distribute <- fh_acs_scoring_by_city_distribute %>% mutate(risk_score = risk_score_both_pc)
fh_acs_scoring_by_city_distribute <- fh_acs_scoring_by_city_distribute %>% group_split(stateabbr) %>% map(function(.x) { .x %>% mutate(rank_in_state=rank(-risk_score)) }) %>% bind_rows()
write_rds(fh_acs_scoring_by_city, path = "fh_acs_covid_city_comm_score.rds")
write_csv(fh_acs_scoring_by_city_distribute, path="fh_acs_covid_city_comm_score.csv")
fh_acs_scoring_county <- fh_acs_scoring %>% select(-c(pc_1, pc_2, risk_score_pc_1, risk_score_pc_2, risk_score_all, risk_score_both_pc, lat,long, geoid, fips_place_tract)) %>% mutate(total_population = total_males + total_females)
location_info <- fh_acs_scoring %>% select(county_code, stateabbr) %>% unique() # go up a hierarchy
fh_acs_scoring_county <- fh_acs_scoring_county %>% cbind(fh_acs[non_na, c(established_disease_se, established_risk_se)])
county_counts <- fh_acs_scoring_county %>% group_by(county_code) %>% summarize(n=n())
fh_acs_scoring_county <- fh_acs_scoring_county %>% left_join(county_counts) %>% filter(n > 1)
## Joining, by = "county_code"
for(i in 1:length(cols_to_total)) {
fh_acs_scoring_county <- fh_acs_scoring_county %>% mutate(!!cols_total[i] := .data[[cols_to_total[i]]]*total_population)
}
fh_acs_scoring_by_county <- fh_acs_scoring_county %>% group_by(county_code) %>% summarise_at(c(cols_total, 'total_population'), sum)
for(i in 1:length(cols_total)) {
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(!!cols_to_prev[i] := .data[[cols_total[i]]]/total_population)
}
weighted_county_code_prev_avgs <- function(d, prefix_cols) {
avgs <- prefix_cols %>% map_dbl(~weighted_prev_avg(d, .x))
place<- d[['county_code']][1]
tibble(disease=prefix_cols, prevalence=avgs, county_code=place)
}
fh_acs_scoring_prevs_by_county <- fh_acs_scoring_county %>% group_by(county_code) %>% group_split(keep=TRUE) %>% map_df(~weighted_county_code_prev_avgs(.x, prefixes))
fh_acs_scoring_prevs_by_county$disease <- sprintf('%s_CrudePrev', fh_acs_scoring_prevs_by_county$disease)
fh_acs_scoring_prevs_by_county <- fh_acs_scoring_prevs_by_county %>% spread(disease, prevalence)
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% left_join(fh_acs_scoring_prevs_by_county, by='county_code')
heatmap.2(cor(fh_acs_scoring_by_county[,established_cols]))
pc_by_county <- prcomp(fh_acs_scoring_by_county[,established_cols],center=T, scale.=T)
summary(pc_by_county)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0683 1.6631 0.93637 0.74657 0.56938 0.55327
## Proportion of Variance 0.6276 0.1844 0.05845 0.03716 0.02161 0.02041
## Cumulative Proportion 0.6276 0.8120 0.87048 0.90763 0.92925 0.94965
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.47853 0.40564 0.33632 0.27247 0.2479 0.21505
## Proportion of Variance 0.01527 0.01097 0.00754 0.00495 0.0041 0.00308
## Cumulative Proportion 0.96492 0.97589 0.98343 0.98838 0.9925 0.99556
## PC13 PC14 PC15
## Standard deviation 0.17598 0.14248 0.12373
## Proportion of Variance 0.00206 0.00135 0.00102
## Cumulative Proportion 0.99763 0.99898 1.00000
pc_by_county$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev -0.1666402 -0.46710898 -0.12727428 0.23758525
## ARTHRITIS_CrudePrev -0.2902344 -0.10349410 -0.27079042 0.30362465
## STROKE_CrudePrev -0.3065678 0.10300399 0.03877146 -0.30512916
## CASTHMA_CrudePrev -0.1906044 0.19933650 -0.71060290 -0.12545798
## COPD_CrudePrev -0.3050213 0.05025504 -0.16770384 -0.02522198
## CHD_CrudePrev -0.3123990 -0.03988041 0.06446970 -0.13628887
## DIABETES_CrudePrev -0.2784138 0.19377115 0.32685305 -0.25976200
## KIDNEY_CrudePrev -0.2920983 0.09514468 0.14000965 -0.46110621
## BPMED_CrudePrev -0.2712243 -0.08809253 0.19144072 0.34971923
## CSMOKING_CrudePrev -0.2553653 0.24179824 -0.25999076 0.15589505
## BPHIGH_CrudePrev -0.2972654 0.10760894 0.17220335 0.08043558
## OBESITY_CrudePrev -0.2349413 0.30978832 0.09604647 0.32432325
## HIGHCHOL_CrudePrev -0.2688750 -0.13348753 0.30635232 0.30006038
## male_over_65_pct -0.1427766 -0.50051949 -0.03218733 -0.21389231
## female_over_65_pct -0.1708718 -0.47741351 -0.08535878 -0.22173714
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev -0.217197418 0.26148317 -0.10081538 0.42762361
## ARTHRITIS_CrudePrev -0.113200438 0.09646505 0.16711323 0.06266107
## STROKE_CrudePrev -0.009377157 -0.01500191 -0.15709016 0.09305968
## CASTHMA_CrudePrev -0.434083544 -0.27028419 0.13168005 -0.07095784
## COPD_CrudePrev 0.209145940 0.28647467 -0.13146871 -0.22161814
## CHD_CrudePrev 0.051017308 0.34623163 -0.11330264 0.20334670
## DIABETES_CrudePrev -0.137591318 -0.08406118 0.06437783 0.03313377
## KIDNEY_CrudePrev -0.172878229 0.09849564 -0.04250791 0.21406198
## BPMED_CrudePrev -0.097418752 -0.56310734 -0.58525826 0.09706909
## CSMOKING_CrudePrev 0.527681173 0.16012193 -0.30666530 -0.21809934
## BPHIGH_CrudePrev -0.027674320 -0.27840922 0.20824528 -0.35326231
## OBESITY_CrudePrev 0.309981549 -0.13935792 0.52767195 0.49132355
## HIGHCHOL_CrudePrev -0.344718169 0.30091911 0.21312741 -0.46981273
## male_over_65_pct 0.324901371 -0.23090688 0.26346865 -0.12385887
## female_over_65_pct 0.217135134 -0.21633114 0.10298491 -0.05370117
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev -0.160096403 0.42065782 -0.090210874 0.065688156
## ARTHRITIS_CrudePrev -0.262912439 -0.61250516 -0.385182227 -0.050418232
## STROKE_CrudePrev -0.176366306 0.26316364 -0.147829009 -0.207427780
## CASTHMA_CrudePrev 0.212887732 0.09933891 0.146680817 -0.020580787
## COPD_CrudePrev -0.283817815 -0.10392786 0.681535675 0.049684335
## CHD_CrudePrev -0.001090552 -0.16300058 0.143841559 0.012083721
## DIABETES_CrudePrev 0.029372901 -0.35102120 -0.125031911 0.248568010
## KIDNEY_CrudePrev 0.185609722 0.08170092 -0.116066313 -0.140859362
## BPMED_CrudePrev 0.127861127 -0.12190631 0.179796674 -0.096108903
## CSMOKING_CrudePrev 0.290501158 0.16103908 -0.430378527 0.021049794
## BPHIGH_CrudePrev -0.580849785 0.35365074 -0.149304675 0.110814705
## OBESITY_CrudePrev 0.176941340 0.12613164 0.206371197 0.006478071
## HIGHCHOL_CrudePrev 0.446212140 0.11483896 0.053395650 -0.074216004
## male_over_65_pct 0.061794076 -0.07979618 0.023375429 -0.607072615
## female_over_65_pct 0.201480946 0.01310754 -0.003369048 0.685130788
## PC13 PC14 PC15
## CANCER_CrudePrev -0.29982048 0.20020110 0.166767159
## ARTHRITIS_CrudePrev 0.29137577 -0.01775561 0.071809041
## STROKE_CrudePrev 0.45430470 0.45389030 -0.435315813
## CASTHMA_CrudePrev -0.17126174 -0.02650641 -0.110546878
## COPD_CrudePrev 0.06989649 0.19403670 0.287434518
## CHD_CrudePrev -0.24054627 -0.52407160 -0.567008450
## DIABETES_CrudePrev -0.52494052 0.44582495 0.059742263
## KIDNEY_CrudePrev 0.22905717 -0.34963881 0.580545810
## BPMED_CrudePrev 0.04540168 -0.07655242 0.022340101
## CSMOKING_CrudePrev -0.18419399 0.01636916 0.092399450
## BPHIGH_CrudePrev -0.12971666 -0.32045102 0.030812750
## OBESITY_CrudePrev 0.08756811 0.03935685 -0.003459127
## HIGHCHOL_CrudePrev 0.12946752 0.07607271 -0.085251772
## male_over_65_pct -0.23730476 0.05099024 0.022145883
## female_over_65_pct 0.25524112 -0.02870006 -0.053249529
fh_acs_scoring_by_county$pc_1 <- as.numeric(pc_by_county$x[, 1])
fh_acs_scoring_by_county$pc_2 <- as.numeric(pc_by_county$x[, 2])
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_pc_1=as.numeric(rescale(-pc_1,to=c(0,100))))
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_pc_2=as.numeric(rescale(-pc_2,to=c(0,100))))
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=as.numeric(rescale(risk_score_all, to=c(0,100))))
plot(fh_acs_scoring_by_county$risk_score_pc_1, fh_acs_scoring_by_county$risk_score_all)
plot(fh_acs_scoring_by_county$risk_score_pc_2, fh_acs_scoring_by_county$risk_score_all)
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county %>% select(c(established_cols, "county_code", "risk_score_both_pc", "risk_score_pc_1", "risk_score_pc_2", "risk_score_all", "total_population"))
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county_distribute %>% mutate(risk_score = risk_score_both_pc)
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county_distribute %>% left_join(location_info)
## Joining, by = "county_code"
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county_distribute %>% group_split(stateabbr) %>% map(function(.x) { .x %>% mutate(rank_in_state=rank(-risk_score)) }) %>% bind_rows()
DT::datatable(fh_acs_scoring_by_county_distribute %>% select(county_code, stateabbr, risk_score, risk_score_pc_1, risk_score_all))
write_rds(fh_acs_scoring_by_county, path = "fh_acs_covid_county_comm_score.rds")
write_csv(fh_acs_scoring_by_county_distribute, path="fh_acs_covid_county_comm_score.csv")
fh_acs_scoring_state <- fh_acs_scoring %>% select(-c(pc_1, pc_2, risk_score_pc_1, risk_score_pc_2, risk_score_all, risk_score_both_pc, lat,long, geoid, fips_place_tract)) %>% mutate(total_population = total_males + total_females)
fh_acs_scoring_state <- fh_acs_scoring_state %>% cbind(fh_acs[non_na, c(established_disease_se, established_risk_se)])
cols_to_total <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
cols_total <- c("male_over_65_count","female_over_65_count","avg_over_65_count")
for(i in 1:length(cols_to_total)) {
fh_acs_scoring_state <- fh_acs_scoring_state %>% mutate(!!cols_total[i] := .data[[cols_to_total[i]]]*total_population)
}
fh_acs_scoring_by_state <- fh_acs_scoring_state %>% group_by(stateabbr) %>% summarise_at(c(cols_total, 'total_population'), sum)
cols_to_prev <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
for(i in 1:length(cols_total)) {
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(!!cols_to_prev[i] := .data[[cols_total[i]]]/total_population)
}
weighted_state_prev_avgs <- function(d, prefix_cols) {
avgs <- prefix_cols %>% map_dbl(~weighted_prev_avg(d, .x))
place<- d[['stateabbr']][1]
tibble(disease=prefix_cols, prevalence=avgs, stateabbr=place)
}
fh_acs_scoring_prevs_by_state <- fh_acs_scoring_state %>% group_by(stateabbr) %>% group_split(keep=TRUE) %>% map_df(~weighted_state_prev_avgs(.x, prefixes))
fh_acs_scoring_prevs_by_state$disease <- sprintf('%s_CrudePrev', fh_acs_scoring_prevs_by_state$disease)
fh_acs_scoring_prevs_by_state <- fh_acs_scoring_prevs_by_state %>% spread(disease, prevalence)
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% left_join(fh_acs_scoring_prevs_by_state, by='stateabbr')
heatmap.2(cor(fh_acs_scoring_by_state[,established_cols]))
pc_by_state <- prcomp(fh_acs_scoring_by_state[,established_cols],center=T, scale.=T)
summary(pc_by_state)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0624 1.6261 0.93665 0.84338 0.60304 0.55470
## Proportion of Variance 0.6252 0.1763 0.05849 0.04742 0.02424 0.02051
## Cumulative Proportion 0.6252 0.8015 0.85998 0.90740 0.93164 0.95216
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.42119 0.40141 0.35899 0.26226 0.24022 0.20958
## Proportion of Variance 0.01183 0.01074 0.00859 0.00459 0.00385 0.00293
## Cumulative Proportion 0.96398 0.97473 0.98332 0.98790 0.99175 0.99468
## PC13 PC14 PC15
## Standard deviation 0.18279 0.15936 0.1450
## Proportion of Variance 0.00223 0.00169 0.0014
## Cumulative Proportion 0.99691 0.99860 1.0000
pc_by_state$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev 0.10849184 -0.48687692 0.31398024 -0.411886788
## ARTHRITIS_CrudePrev 0.28762056 -0.17192838 0.25955907 -0.197225365
## STROKE_CrudePrev 0.29568787 0.16693564 -0.03297050 0.230510634
## CASTHMA_CrudePrev 0.20051809 0.05812432 0.69740669 0.425880543
## COPD_CrudePrev 0.30897788 -0.02911972 0.16850702 -0.046970195
## CHD_CrudePrev 0.30775724 -0.05746680 0.06694467 -0.135046388
## DIABETES_CrudePrev 0.29745592 0.16297635 -0.20301709 0.171436753
## KIDNEY_CrudePrev 0.28920461 0.13182034 -0.09141990 0.335045426
## BPMED_CrudePrev 0.27306413 -0.03869105 -0.27696012 0.006104727
## CSMOKING_CrudePrev 0.28252688 0.14581369 0.07492955 -0.172516093
## BPHIGH_CrudePrev 0.30551845 0.09775451 -0.17117986 0.014660662
## OBESITY_CrudePrev 0.26438155 0.23866718 -0.01158984 -0.344739791
## HIGHCHOL_CrudePrev 0.26508121 -0.07842503 -0.28536429 -0.290631223
## male_over_65_pct 0.09406325 -0.54440342 -0.23571636 0.238328022
## female_over_65_pct 0.13719840 -0.51210848 -0.12401379 0.323614019
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev -0.092800184 0.17138413 -0.036601197 0.33053106
## ARTHRITIS_CrudePrev 0.122650282 -0.12151074 -0.071701223 0.12127297
## STROKE_CrudePrev -0.115817383 0.29426838 -0.119429075 0.13270871
## CASTHMA_CrudePrev 0.120709997 -0.37897765 0.038021789 -0.06411735
## COPD_CrudePrev 0.074447044 0.15528084 0.254328731 -0.16032606
## CHD_CrudePrev -0.002075941 0.39780839 0.198677079 0.09173957
## DIABETES_CrudePrev 0.168051107 0.06790125 0.008225568 0.16780350
## KIDNEY_CrudePrev 0.093091476 0.33141343 -0.013481235 0.26613298
## BPMED_CrudePrev -0.544064878 -0.47452712 0.436704927 0.32519894
## CSMOKING_CrudePrev -0.434077909 0.15799066 0.072228542 -0.70237792
## BPHIGH_CrudePrev 0.129793475 -0.23110414 -0.277026208 -0.09955023
## OBESITY_CrudePrev -0.125653438 -0.21319431 -0.650209620 0.11167196
## HIGHCHOL_CrudePrev 0.618208470 -0.21857354 0.277584039 -0.19754738
## male_over_65_pct -0.041764415 0.09842596 -0.299529602 -0.18383121
## female_over_65_pct -0.070108924 -0.14958462 -0.105783421 -0.16408911
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev -0.33847058 0.27385133 -0.022327365 0.33731906
## ARTHRITIS_CrudePrev 0.22244333 -0.60346181 0.406571091 -0.22591943
## STROKE_CrudePrev -0.16795671 0.08438731 0.529350080 0.07680340
## CASTHMA_CrudePrev -0.19024150 0.07708237 -0.090885615 -0.03016615
## COPD_CrudePrev 0.43576304 0.43458153 -0.231497062 -0.26849595
## CHD_CrudePrev 0.32665747 -0.06317301 -0.062038440 -0.04336513
## DIABETES_CrudePrev 0.16708007 -0.16881185 -0.162410783 0.39273395
## KIDNEY_CrudePrev -0.38074297 -0.11567382 -0.305453027 -0.14729176
## BPMED_CrudePrev -0.05992350 0.04338464 0.006375307 -0.15078108
## CSMOKING_CrudePrev -0.25668548 -0.17977944 -0.008194147 0.14487659
## BPHIGH_CrudePrev 0.14631682 0.48295176 0.390584986 0.18808809
## OBESITY_CrudePrev 0.02950784 -0.03225527 -0.410628122 -0.12963899
## HIGHCHOL_CrudePrev -0.36860676 -0.03898386 -0.036720263 -0.01577031
## male_over_65_pct -0.13471336 0.08823552 0.032653899 -0.51681016
## female_over_65_pct 0.23728386 -0.18776488 -0.211935685 0.46391065
## PC13 PC14 PC15
## CANCER_CrudePrev 0.02199290 0.11447485 0.130167303
## ARTHRITIS_CrudePrev -0.15251017 0.26895818 0.078774664
## STROKE_CrudePrev -0.28876303 -0.52508602 0.134373204
## CASTHMA_CrudePrev 0.22722459 -0.14497817 -0.030962493
## COPD_CrudePrev -0.39636904 0.05755014 0.296967872
## CHD_CrudePrev 0.46525605 -0.27770811 -0.512941666
## DIABETES_CrudePrev 0.39440097 0.09678108 0.592727304
## KIDNEY_CrudePrev -0.23994099 0.42936186 -0.271286432
## BPMED_CrudePrev 0.03432547 -0.01059645 -0.002745499
## CSMOKING_CrudePrev 0.06213576 0.16059531 0.051847316
## BPHIGH_CrudePrev 0.12684531 0.40721317 -0.293562171
## OBESITY_CrudePrev -0.07203648 -0.25448972 -0.019739381
## HIGHCHOL_CrudePrev -0.07019707 -0.25693202 -0.041457898
## male_over_65_pct 0.32997346 -0.02800812 0.209073162
## female_over_65_pct -0.33825768 -0.14276478 -0.216611910
fh_acs_scoring_by_state$pc_1 <- as.numeric(pc_by_state$x[, 1])
fh_acs_scoring_by_state$pc_2 <- as.numeric(pc_by_state$x[, 2])
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_pc_1=as.numeric(rescale(pc_1,to=c(0,100))))
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_pc_2=as.numeric(rescale(-pc_2,to=c(0,100))))
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=as.numeric(rescale(risk_score_all,to=c(0,100))))
#plot(fh_acs_scoring_by_state$risk_score_pc_1, fh_acs_scoring_by_state$risk_score_all)
#plot(fh_acs_scoring_by_state$risk_score_pc_2, fh_acs_scoring_by_state$risk_score_all)
fh_acs_scoring_by_state_distribute <- fh_acs_scoring_by_state %>% select(c(established_cols, "stateabbr", "risk_score_both_pc", "risk_score_pc_1", "risk_score_pc_2", "risk_score_all", "total_population"))
fh_acs_scoring_by_state_distribute <- fh_acs_scoring_by_state_distribute %>% mutate(rank_in_country=rank(risk_score_both_pc)) %>% mutate(risk_score = risk_score_both_pc)
DT::datatable(fh_acs_scoring_by_state_distribute %>% select(stateabbr, risk_score,risk_score_all))
write_rds(fh_acs_scoring_by_state, path = "fh_acs_covid_state_comm_score.rds")
write_csv(fh_acs_scoring_by_state_distribute, path="fh_acs_covid_state_comm_score.csv")